#4-24-10 - Michael Barnett
#NAMCS Analysis for data from namcs_repl3.R
#Will look at several different cardiovascular disease measures by different patient and physician
#characteristics

library(survey)
	#Adjusting for sample design in survey package, this is why all the warnings don't crash the package
	options(survey.lonely.psu="remove")
	options(survey.ultimate.cluster=TRUE)
library(Zelig)
library(MatchIt)
library(Amelia)
library(cem)
library(ggplot2)

##########
#Section 1: Functions for analysis
##########

#This is a silly function to process the subsetted NAMCS dataframe for setx() to work properly
	qm_factor <- function (df, type="factor"){
		if (type=="factor"){
			df$payment <- as.factor(df$payment)
			df$age.cat <- as.factor(df$age.cat)
			df$numchron <- as.factor(df$numchron)
			df$demog <- as.factor(df$demog)
			df$escrip <- as.factor(df$escrip)
			df$etest <- as.factor(df$etest)
			df$enotes <- as.factor(df$enotes)
			df$result <- as.factor(df$result)
			df$remind <- as.factor(df$remind)
			df$gp <- as.factor(df$gp)
			df$solo <- as.factor (df$solo)
			df$urban <- as.factor(df$urban)
			df$white <- as.factor (df$white)
			df$male <- as.factor(df$male)	
			df$office <- as.factor(df$office)
			df$owner <- as.factor(df$owner)
			df$private <- as.factor(df$private)
			df$EHR <- as.factor(df$EHR)
			}
		if (type=="refactor"){
			df$payment <- as.numeric(df$payment)
			df$age.cat <- as.numeric(df$age.cat)
			df$numchron <- as.numeric(df$numchron)
			df$demog <- as.numeric(df$demog)
			df$escrip <- as.numeric(df$escrip)
			df$etest <- as.numeric(df$etest)
			df$enotes <- as.numeric(df$enotes)
			df$result <- as.numeric(df$result)
			df$remind <- as.numeric(df$remind)
			df$gp <- as.numeric(df$gp)
			df$solo <- as.numeric (df$solo)
			df$urban <- as.numeric(df$urban)
			df$white <- as.numeric (df$white)
			df$male <- as.numeric(df$male)
			df$office <- as.numeric(df$office)
			df$owner <- as.numeric(df$owner)
			df$private <- as.numeric(df$private)
			
			df$payment <- as.factor(df$payment)
			df$age.cat <- as.factor(df$age.cat)
			df$numchron <- as.factor(df$numchron)
			df$demog <- as.factor(df$demog)
			df$escrip <- as.factor(df$escrip)
			df$etest <- as.factor(df$etest)
			df$enotes <- as.factor(df$enotes)
			df$result <- as.factor(df$result)
			df$remind <- as.factor(df$remind)
			df$gp <- as.factor(df$gp)
			df$solo <- as.factor (df$solo)
			df$urban <- as.factor(df$urban)
			df$white <- as.factor (df$white)
			df$male <- as.factor(df$male)
			df$office <- as.factor(df$office)
			df$owner <- as.factor(df$owner)
			df$private <- as.factor(df$private)
			}	
			return(df)
		}
	#Function to output multivariate estimates + SE
	#"qi" = quantity of interest
	#This function takes a Zelig model and outputs the expected values
	#for compliance with quality measure in EHR and non-EHR visits
	#for the "modal" visit
logit_qi <- function(model, treat, no_treat, sims=10000){
	
			no_treat_mean <- mean(sim(model, x=no_treat, nums=sims)$qi$ev)
			no_treat_sd <- sd(sim(model, x=no_treat, nums=sims)$qi$ev)
		
			treat_mean <- mean(sim(model, x=treat, nums=sims)$qi$ev)
			treat_sd <- sd(sim(model, x=treat, nums=sims)$qi$ev)
			
			
		results <- data.frame(mean=c(no_treat_mean, treat_mean), sd=c(no_treat_sd, treat_sd))
			rownames(results) <- c("No Treatment", "Treatment")
		return(results)				
}		
		
#################
#Section 2: Analysis
#################

#Quality measures:
	#Blood pressure management:
	#	Numerator (denominator): htn.measure (htn.visits), htn.dm (dm.visits), htn.ivd (ivd.visits), htn.control.all (all.visits)
	#Cardiovascular disease management:
	#	Numerator (denominator): chf.beta (chf.beta.visits), chf.ace (chf.ace.visits), cad.ace (cad.dm.visits), 
	#							 cad.statin (cad.statin.visits), cad.asp (cad.asp.visits), af.coum (af.visits)
#Patient and Physician covariates:
	#Physician level 
		#Binary (0/1): EHR, demog, escrip, etest, enotes, result, employ (1 = physician is owner), gp (1 = generalist), solo (1 = solo)		#Categories: office (1 = private solo/group, 2 = free standing clinic, 3 = community health center, 4 = other)
		#			 owner (1 = physician, 2 = HMO or health corp, 3 = other), 
		#			 private (1 = <50% private insur., 2 = >50%)
	#Patient level:  
		#Binary (0/1): urban, white, male
		#Categories: payment (1 = private, 2 = medicare, 3 = medicaid/underinsured, 4 = other)
		#			 age.cat (1 = <45 yo, 2 = 45-64 yo, 3 = >65 yo)
		#			 numchron (0, 1, 2, 3, 4 = 4+ conditions)

	
#Pick a quality measure and use this for getting quantities of interest from imputed datasets

#Load imputed datasets
load("namcs_mi_4-25-10.RData")

namcs_match <-  qm.multi.match <- sample <- list()


#The annoying part of this analysis:
	#Need to repeat the following code for each quality measure/matching treatment combination

vars <- names(namcs_mi$imput[[1]])[c(15, 18, 20, 21, 54:93)]
#Create 5 matched imputed datasets
for(i in 1:5){

										  #Change this to the right denominator
	qm_namcs <- namcs_mi$imput[[i]][namcs_mi$imput[[i]]$chf.beta.visits, vars]
	namcs_match[[i]] <- qm_factor(qm_namcs, type="factor")
	#Matched analysis:
						#Change this to the right matching varible
		match <- matchit(office ~ EHR + demog + etest + result + escrip + remind +							solo + owner + gp + private +
							urban + white + male +
							payment + age.cat + numchron +
							patwt + physwt, 
					     	data = qm_namcs, match="cem")
		namcs_match[[i]] <- match.data(match)
		
		sample[[i]] <- table(namcs_match[[i]]$solo)
		print(i)
}
#This is the sample size for treatment group (approximately)
mean(unlist(lapply(sample, mean)))

namcs_zelig <- mi(namcs_match[[1]], namcs_match[[2]], namcs_match[[3]], namcs_match[[4]], namcs_match[[5]])
					#Change this to the right numerator
qm.multi.match <- zelig(chf.beta ~ EHRscore + demog + escrip + etest + enotes + result + remind + 
								   gp + solo + office + owner + private +
								   urban + white + male +
								   payment + age.cat + numchron, 
						data=namcs_zelig, model = "logit.survey", ids=~cpsum, strata=~cstratm, weights=~patwt, nest=TRUE)
#Check model
#summary(qm.multi.match)						

							#Change this to the right matching variable
							
no_treat <- setx(qm.multi.match, office=0)
treat <- setx(qm.multi.match, office=1)
fd <- sim(qm.multi.match, x = no_treat, x1 = treat, num = 10000)
summary(fd)





######Appedix Code########


#Unmatched analysis:
		#qm.uni.nomatch <- svyby(~chf.beta, by=~EHR, design=namcs_svy, svyratio, denominator=~chf.beta.visits)
	#P-value for difference:
		#qm.uni.nomatch.p <- svychisq(~payment+gp+af.coum, namcs_svy, statistic="Wald")
	#Umatched multivariate:
		#qm.multi.nomatch <- zelig(chf.beta ~ office + owner  + solo + employ + gp + age +  payment + sex + race + gp*payment, 
	    #							  data= qm_namcs, model = "logit.survey", 
	    #							  ids=~cpsum, strata=~cstratm, weights=~patwt, nest=TRUE)
		
#If simulating quantities of interest: (need to change logit_qi function for desired variable)
		#qi.nomatch <- logit_qi(qm.multi.nomatch, type = "gp")
		#qi.match <- logit_qi(qm.multi.match, type = "gp")
		
sample <- c()		
for (i in 1:5){		
		sample[i]<- sum(table(namcs_match[[i]]$htn.measure, namcs_match[[i]]$EHR)[,2])
		namcs_svymatch <- svydesign(ids=~cpsum, strata=~cstratm, data=namcs_match[[i]], weights=~patwt, nest=TRUE)
		qm.uni.match[[i]] <- svyby(~htn.measure, by=~EHR, design=namcs_svymatch, svyratio, denominator=~htn.visits)
		print(i)
		#qm.uni.match.p[[i]] <- svychisq(~chf.beta+EHR, namcs_svymatch, statistic="Wald")
}

qm.uni.match <- qm.uni.match.p <- list()
#Calculate imputed mean
qm.uni.mean <- (qm.uni.match[[1]][,2] + qm.uni.match[[2]][,2] + qm.uni.match[[3]][,2] + 
			    qm.uni.match[[4]][,2] + qm.uni.match[[5]][,2])/5 
			    
#Calculate SE in two steps
qm.uni.se1 <-  ((qm.uni.match[[1]][,3]^2 + qm.uni.match[[2]][,3]^2 + qm.uni.match[[3]][,3]^2 + 
			     qm.uni.match[[4]][,3]^2 + qm.uni.match[[5]][,3]^2)/5)
qm.uni.se2 <-    (((qm.uni.match[[1]][,2]-qm.uni.mean)^2 + (qm.uni.match[[2]][,2]-qm.uni.mean)^2 + 
				   (qm.uni.match[[3]][,2]-qm.uni.mean)^2 + (qm.uni.match[[4]][,2]-qm.uni.mean)^2 +
			       (qm.uni.match[[5]][,2]-qm.uni.mean)^2)/4)*(1+(1/5))
qm.uni.se <- sqrt(qm.uni.se1 + qm.uni.se2)			        			        	
uni.output <- c(mean(sample), qm.uni.mean, qm.uni.se)		



		
		
		
		
		
		